# encoding=utf8

import os
import fnmatch
from win32com import client as wc
from win32com.client import Dispatch


def word2txt(filePath, savePath=''):
    dirs, filename = os.path.split(filePath)

    new_name = ''
    if fnmatch.fnmatch(filename, '*.doc'):
        new_name = filename[:-3] + 'txt'
    elif fnmatch.fnmatch(filename, '*.docx'):
        new_name = filename[:-4] + 'txt'
    else:
        print('文件格式错误')

    if savePath == '':
        savePath = dirs

    new_path = os.path.join(savePath, new_name)

    #加载word文本提取程序
    wordApp = wc.Dispatch('KWPS.Application')
    txt = wordApp.Documents.Open(filePath)

    #4代表抽取文本
    txt.SaveAs(new_path,4)
    txt.Close()


if __name__ == '__main__':
	word2txt('F:\python project\KNN\DataCapture\语音助手1.0版本.docx')